#coding:utf8
'''
#全国企业信用信息公示系统（广东）
#维护肖迪
'''
import pycurl 
import urllib
import re
from utils import kill_captcha
import StringIO
import random
from bs4 import BeautifulSoup
import requests
import json
import table
from scpy.logger import get_logger
logger = get_logger(__file__)
import time
def try_number(num):
    def _func(func):
        def __func(*args, **kwargs):
            number=0
            starTime = time.time()
            while 1:
                try:
                    print 'hello'
                    result =  func(*args, **kwargs)
                    logger.info(float(time.time())-float(starTime))
                    return result
                except Exception,e:
                     print e
                     print time.time()-starTime
                     print '*'*100
                     number+=1
                     logger.info('验证码错误')
                     print number
                     if number == num:
                         break
                     continue
        return __func
    return _func
def curl(url, data='', cookie='', debug=False):  #抓取函数[get,post]
    UserAgent = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
    s = StringIO.StringIO()
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(c.REFERER, 'http://gsxt.scaic.gov.cn/')
    if cookie:
        c.setopt(c.COOKIEJAR, "cookie_file_name1")
    c.setopt(c.COOKIEFILE, "cookie_file_name1")
    c.setopt(pycurl.FOLLOWLOCATION, True)
    c.setopt(pycurl.TIMEOUT, 30)
    if data:
        c.setopt(c.POSTFIELDS, urllib.urlencode(data))
    c.setopt(pycurl.ENCODING, 'gzip')
    c.setopt(c.HTTPHEADER, ['Host:gsxt.gdgs.gov.cn', 'Upgrade-Insecure-Requests:1',
                            'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36',
                            'Origin:http://gsxt.scaic.gov.cn','Referer:http://gsxt.gdgs.gov.cn/aiccips/CheckEntContext/showInfo.html'])
    c.setopt(c.WRITEDATA, s)
    c.perform()
    c.close()
    return s.getvalue()
def curl1(url, data='', cookie='', debug=False):
    UserAgent = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
    s = StringIO.StringIO()
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(pycurl.TIMEOUT, 30)
    c.setopt(c.REFERER, 'http://gsxt.scaic.gov.cn/')
    if cookie:
        c.setopt(c.COOKIEJAR, "cookie_file_name1")
    c.setopt(c.COOKIEFILE, "cookie_file_name1")
    c.setopt(pycurl.FOLLOWLOCATION, True)
    if data:
        c.setopt(c.POSTFIELDS, urllib.urlencode(data))
    c.setopt(pycurl.ENCODING, 'gzip')
    c.setopt(c.HTTPHEADER, ['Host:www.szcredit.com.cn', 'Upgrade-Insecure-Requests:1',
                            'User-Agent:Googlebot/2.1 (+http://www.googlebot.com/bot.html)',
                            'Origin:http://gsxt.scaic.gov.cn','Origin:http://www.szcredit.com.cn'])
    c.setopt(c.WRITEDATA, s)
    c.perform()
    c.close()
    return s.getvalue()

@try_number(20)
def verify(name):
    global detail_url
    #code = curl('http://gsxt.gdgs.gov.cn/aiccips/',cookie=1)
    verify_url = "http://gsxt.gdgs.gov.cn/aiccips/verify.html?random=%s"%random.random()
    verify_image = curl(verify_url,cookie=1)
    verify = kill_captcha(verify_image,'gd','png')

    if not verify or verify == 'wrong' or len(verify) > 100:
        logger.info('验证码为:%s' % verify)
        logger.error("破解验证码的服务出现异常,可能是下载的验证码错误，也可能破解服务出现异常！")
        raise Exception("破解验证码的服务出现异常")  # 返回空字符串，用于重复破解

    print verify
    #open('/Users/xiaodi/Desktop/工商/crawler-service/saic_service/crawler/123.png','w').write(verify_image)
    data = {"textfield":name,"code":verify}
    url_url = 'http://gsxt.gdgs.gov.cn/aiccips/CheckEntContext/checkCode.html'
    html = curl(url_url,data)
    html_json = json.loads(html)
    #url_url_url = 'http://ei.cnzz.com/stat.htm?id=1000300894&r=http%%3A%%2F%%2Fgsxt.gdgs.gov.cn%%2Faiccips%%2F&lg=zh-cn&ntime=1445396508&cnzz_eid=1861547022-1444790155-http%%3A%%2F%%2Fgsxt.gdgs.gov.cn%%2F&showp=1920x1080&ei=%%25E5%%2585%%25AC%%25E7%%25A4%%25BA%%25E7%%25B3%%25BB%%25E7%%25BB%%259F%%7C%%25E6%%2590%%259C%%25E7%%25B4%%25A2%%7C%%7C0%%7C&t=%%E4%%BC%%81%%E4%%B8%%9A%%E4%%BF%%A1%%E7%%94%%A8%%E4%%BF%%A1%%E6%%81%%AF%%E5%%85%%AC%%E7%%A4%%BA%%E7%%B3%%BB%%E7%%BB%%9F&h=1&rnd=102286228'
    #curl(url_url_url)
    textfield = html_json['textfield']
    data = {"textfield":textfield,"code":verify}
    search_url = 'http://gsxt.gdgs.gov.cn/aiccips/CheckEntContext/showInfo.html'
    html = curl(search_url,data)
    #print html
    url = re.search('<li class="font16"><a href="(.+?)"',html)
    print url
    if url:
        return url.group(1)
    else:
        print html
        return ''

    
def Judgment(url):
    if 'http://www.szcredit.com.cn/' in url:
        html = curl(url).decode('gbk').encode('utf8')
        rid = re.search('rid=(.+)',url).group(1)
        print rid
        return run(html,rid)
    else:
        url = url.replace('../','http://gsxt.gdgs.gov.cn/aiccips/')
        html = curl(url)
        return run1(html)


def run(rid,detail_html,**args):
    # url = re.findall('action="(.+?)"',detail_html)[0]
    # url = 'http://www.szcredit.com.cn/web/GSZJGSPT/'+url
    companyData = detail_html
    url = 'http://www.szcredit.com.cn/web/GSZJGSPT/QyxyDetail.aspx?rid='+ rid

    viewstate = re.findall('id="__VIEWSTATE"value="(.+?)"',detail_html.replace(' ',''))[0]
    rator = re.findall('id="__VIEWSTATEGENERATOR"value="(.+?)"',detail_html.replace(' ',''))[0]
    # print '#'*80
    # print viewstate
    # print rator
    # print '#'*80

    head = {'Host':'www.szcredit.com.cn','Origin':'http://www.szcredit.com.cn','User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36','X-MicrosoftAjax':'Delta=true'}
    data1 = {"ScriptManager1":"xingzhengchufa|Timer1","__EVENTTARGET":"Timer1","__EVENTARGUMENT":"","__VIEWSTATE":viewstate,"__VIEWSTATEGENERATOR":rator,"__ASYNCPOST":"true","":""}
    html1 = requests.post(url,data = data1,headers=head).content.decode('gbk').encode('utf8')

    #open('/Users/xiaodi/Desktop/1.html','w').write(html1)
    data2 = {"ScriptManager1":"biangengxinxi|Timer2","__EVENTTARGET":"Timer2","__EVENTARGUMENT":"","__VIEWSTATE":viewstate,"__VIEWSTATEGENERATOR":rator,"__ASYNCPOST":"true","":""}
    html2 = requests.post(url,data=data2,headers=head).content.decode('gbk').encode('utf8')
    #open('/Users/xiaodi/Desktop/2.html','w').write(html2)
    #print html2
    detail_html = detail_html+html1+html2


    tables = re.findall('<table[\s\S]+?</table>',detail_html)
    for j in tables:
        word = re.findall('<thcolspan="\d+?"style="text-align:center;">(.+?)</th>', j.replace(' ', '').replace('\n', '').replace('\r', '').replace('\t', ''))
        if word:
            print word[0]
        
        if '股东信息' in j or '投资人信息' in j:
            shareHolderList = table.index('股东信息',j)
            if shareHolderList:
                for i in shareHolderList:
                    #print i['shareHolderdetail']
                    share_url = re.findall("href='(EntSHDetail\.aspx.+?)'",i['shareHolderdetail'])[0]
                    share_url = 'http://www.szcredit.com.cn/web/GSZJGSPT/'+share_url
                    html = curl(share_url)
                    html = re.findall('<table[\s\S]+?</table>',html)[0]
                    result_list = table.investment_information(html)
                    if result_list:
                        subConam = result_list[0]
                        conDate = ""
                        fundedRatio = ""
                        regCapCur = result_list[1]
                        country = ""
                    else:
                        share_url = ""
                        subConam = ""
                        conDate = ""
                        fundedRatio = ""
                        regCapCur = ""
                        country = ""                        
                    i['shareHolderdetail'] = share_url
                    i['subConam'] = subConam
                    i['conDate'] = conDate
                    i['fundedRatio'] = fundedRatio
                    i['regCapCur'] = regCapCur
                    i['country'] = country
                           
        try:
            if '基本信息' == word[0].strip():
                basicList = table.index(word[0].replace(' ',''),j)
            #if '股东信息' == word[0]:
            #    shareHolderList = table.index('股东信息',j)
            if '主要人员信息' == word[0]:
                personList = table.index(word[0].replace(' ',''),j)
            if '变更信息' == word[0]:
                alterList = table.index(word[0].replace(' ',''),j)
            if '分支机构信息' == word[0]:
                filiationList = table.index(word[0].replace(' ',''),j)
            if '清算信息' == word[0]:
                liquidationList = table.index(word[0].replace(' ',''),j)
            if '经营异常' == word[0] or '经营异常信息' == word[0]:
                abnormalOperation = table.index(word[0].replace(' ',''),j)
            if '抽查检查信息' == word[0]:
                checkMessage = table.checkMessage(word[0].replace(' ',''),j)
        except: 
            print word
            continue
    try:
        print basicList
    except:
        basicList = []
    try:
        print shareHolderList 
    except:
        shareHolderList = []
    try:
        print personList
    except:
        personList = []
    try:
        print alterList
    except:
        alterList = []
    try:
        print filiationList
    except:
        filiationList = []
    try:
        print liquidationList
    except:
        liquidationList = []
    try:
        print abnormalOperation
    except:
        abnormalOperation = []
    try:
        flags = checkMessage
    except:
        checkMessage = []
    punishBreakList = []
    punishedList = []
    alidebtList =[]
    # entinvItemList = [{"entName":"","entType":"","fundedRatio":"","currency":"","entStatus":"","canDate":"","esDate":"","regOrg":"","regCapcur":"","regCap":"","revDate":"","name":"","subConam":"","regNo":""}]
    entinvItemList = []
    frinvList =[]
    frPositionList = []
    caseInfoList = []
    sharesFrostList = []
    sharesImpawnList = []
    morDetailList = []
    morguaInfoList = []
    report_url = 'http://www.szcredit.com.cn/web/GSZJGSPT/QynbDetail.aspx?'+rid
    #report_url = re.findall('href="(http://www\.nmgs\.gov\.cn:7001/aiccips/BusinessAnnals/view\.html.+?)"',report_html)
    #report_url = ''
    html = curl(report_url)
    report_url = re.findall("href='(http://app02\.szaic\.gov\.cn/NB\.WebUI/WebPages/.+?)'",html)
    yearReportList = []
    yearList =[]
    for i in report_url:
        print i

        html = curl(i)
        #print html
        table_list = re.findall('<table[\s\S]+?</table>',html)
        for j in table_list:
            if '统一社会信用代码' in j:
                j = re.sub('<span.+?>','',j)
                j = j.replace('</span>','')
                report_basic = table.report_basic(j)
            if '网站或网店信息' in j:
                j = re.sub('<span.+?>','',j)
                j = j.replace('</span>','')
                report_website = table.report_website(j)
            if '企业资产状况信息' in j:
                j = re.sub('<span.+?>','',j)
                j = j.replace('</span>','')                
                report_assetsInfo = table.report_assetsInfo(j)
            if '股东及出资信息' in j:
                report_investorInformations =  table.report_investorInformations(j)
            if '股权变更信息' in j:
                j = re.sub('<span.+?>','',j)
                j = j.replace('</span>','')
                report_equityChangeInformations = table.report_equityChangeInformations(j)
            if '修改记录' in j:
                j = re.sub('<span.+?>','',j)
                j = j.replace('</span>','')
                report_changeRecords = table.report_changeRecords(j)
            try:
                print report_basic
            except:
                report_basic = {}
            try:
                print report_website
            except:
                report_website = {}
            try:
                print report_basic
            except:
                report_basic = []
            try:
                print report_assetsInfo
            except:
                report_assetsInfo = {}
            try:
                print report_investorInformations
            except:
                report_investorInformations = []
            try:
                print report_equityChangeInformations
            except:
                report_equityChangeInformations = []
            try:
                print report_changeRecords
            except:
                 report_changeRecords = []
            year = re.search('NBYear=(.+)',i).group(0)
        dit1 = {"year":year,"baseInfo":report_basic,"website":report_website,"investorInformations":report_investorInformations,"assetsInfo":report_assetsInfo,"equityChangeInformations":report_equityChangeInformations,"changeRecords":report_changeRecords}
        dit2 = {"year":year,"html":html}
        yearReportList.append(dit1)
        yearList.append(dit2)
    alldata = {"abnormalOperation":abnormalOperation,"basicList":basicList,"shareHolderList":shareHolderList,"personList":personList,"punishBreakList":punishBreakList,"punishedList":punishedList,"alidebtList":alidebtList,"entinvItemList":entinvItemList,"frinvList":frinvList,"frPositionList":frPositionList,"alterList":alterList,"filiationList":filiationList,"caseInfoList":caseInfoList,"sharesFrostList":sharesFrostList,"sharesImpawnList":sharesImpawnList,"morDetailList":morDetailList,"morguaInfoList":morguaInfoList,"liquidationList":liquidationList,"yearReportList":yearReportList,"checkMessage":checkMessage}
    if args.get('type') == 1:
        province = "gd"
        keyword = args.get('searchword')
        companyName = basicList[0].get('enterpriseName')
        result = {"province":province,"type":0,"html":detail_html,"yearList":yearList,"keyword":keyword,"companyName":companyName}
        companyUrl = {"province":province,"methond":"post","url":url,"data":companyData,"companyName":companyName}
        return (result,alldata,companyUrl)
    return alldata
def run1(detail_html,**args):
    entNo = re.findall('name="entNo" value="(.+?)"',detail_html)[0]
    entType = re.findall('id="entType" name="entType" value="(.+?)"',detail_html)[0]
    regOrg = re.findall('id="regOrg" name="regOrg" value="(.+?)"',detail_html)[0]
    data = {"entNo":entNo,"entType":entType,"regOrg":regOrg}
    print data
    report_html = curl('http://gsxt.gdgs.gov.cn/aiccips/BusinessAnnals/BusinessAnnalsList.html',data)
    html1 = curl('http://gsxt.gdgs.gov.cn/aiccips/GSpublicity/GSpublicityList.html?service=entCheckInfo',data)
    html2 = curl('http://gsxt.gdgs.gov.cn/aiccips/GSpublicity/GSpublicityList.html?service=cipUnuDirInfo',data)
    html3 = curl('http://gsxt.gdgs.gov.cn/aiccips/GSpublicity/GSpublicityList.html?service=cipSpotCheInfo',data)

    detail_html = detail_html+'</br>'+html1+'</br>'+html2+'</br>'+html3

    tables = detail_html.split('</br>')
    for j in tables:
        word = re.findall('<th colspan="\d+?" style="text-align:center;">(.+?)</th>',j)
        print word
        
        if '股东信息' in j:
            shareHolderList = table.index('股东信息',j)
            if shareHolderList:
                for i in shareHolderList:
                    if 'window.open' in i['shareHolderdetail']:
                        share_url = re.findall("window.open\('(.+?)'\)",i['shareHolderdetail'])[0]
                        html = curl(share_url)
                        try:
                            html = re.findall('<table[\s\S]+?</table>',html)[0]
                            result_list = table.investment_information(html)
                            if result_list:
                                subConam = result_list[0]
                                conDate = ""
                                fundedRatio = ""
                                regCapCur = result_list[1]
                                country = ""
                            else:
                                subConam = ""
                                conDate = ""
                                fundedRatio = ""
                                regCapCur = ""
                                country = ""
                        except:
                            subConam = ""
                            conDate = ""
                            fundedRatio = ""
                            regCapCur = ""
                            country = ""
                    else:
                        subConam = ""
                        conDate = ""
                        fundedRatio = ""
                        regCapCur = ""
                        country = ""
                        share_url = ""
                    i['shareHolderdetail'] = share_url
                    i['subConam'] = subConam
                    i['conDate'] = conDate
                    i['fundedRatio'] = fundedRatio
                    i['regCapCur'] = regCapCur
                    i['country'] = country
                           
        try:
            if '基本信息' == word[0].strip():
                basicList = table.index(word[0].replace(' ',''),j)
            #if '股东信息' == word[0]:
            #    shareHolderList = table.index('股东信息',j)
            if '主要人员信息' == word[0]:
                personList = table.index(word[0].replace(' ',''),j)
            if '变更信息' == word[0]:
                alterList = table.index(word[0].replace(' ',''),j)
            if '分支机构信息' == word[0]:
                filiationList = table.index(word[0].replace(' ',''),j)
            if '清算信息' == word[0]:
                liquidationList = table.index(word[0].replace(' ',''),j)
            if '经营异常' == word[0] or '经营异常信息' == word[0]:
                abnormalOperation = table.index(word[0].replace(' ',''),j)
            if '抽查检查信息' == word[0]:
                checkMessage = table.checkMessage(word[0].replace(' ',''),j)
        except: 
            print word
            continue
    try:
        print basicList
    except:
        basicList = []
    try:
        print shareHolderList 
    except:
        shareHolderList = []
    try:
        print personList
    except:
        personList = []
    try:
        print alterList
    except:
        alterList = []
    try:
        print filiationList
    except:
        filiationList = []
    try:
        print liquidationList
    except:
        liquidationList = []
    try:
        print abnormalOperation
    except:
        abnormalOperation = []
    try:
        flags = checkMessage
    except:
        checkMessage = []
    punishBreakList = []
    punishedList = []
    alidebtList =[]
    entinvItemList = []
    frinvList =[]
    frPositionList = []
    caseInfoList = []
    sharesFrostList = []
    sharesImpawnList = []
    morDetailList = []
    morguaInfoList = []

    report_url = re.findall('href="(http://gsxt\.gdgs\.gov\.cn/aiccips/BusinessAnnals/view\.html.+?)">(\d+)',report_html)
    yearReportList = []
    yearList =[]
    for i in report_url:
        print i
        url = i[0]
        year = i[1]
        html = curl(url)
        #print html
        table_list = re.findall('<table[\s\S]+?</table>',html)
        for j in table_list:
            if '统一社会信用代码' in j:
                j = re.sub('<span.+?>','',j)
                j = j.replace('</span>','')
                report_basic = table.report_basic(j)
            if '网站或网店信息' in j:
                j = re.sub('<span.+?>','',j)
                j = j.replace('</span>','')
                report_website = table.report_website(j)
            if '企业资产状况信息' in j:
                j = re.sub('<span.+?>','',j)
                j = j.replace('</span>','')                
                report_assetsInfo = table.report_assetsInfo(j)
            if '股东及出资信息' in j:
                report_investorInformations =  table.report_investorInformations(j)
            if '股权变更信息' in j:
                j = re.sub('<span.+?>','',j)
                j = j.replace('</span>','')
                report_equityChangeInformations = table.report_equityChangeInformations(j)
            if '修改记录'  in j:
                j = re.sub('<span.+?>','',j)
                j = j.replace('</span>','')
                report_changeRecords = table.report_changeRecords(j)
            try:
                print report_basic
            except:
                report_basic = {}
            try:
                print report_website
            except:
                report_website = {}
            try:
                print report_basic
            except:
                report_basic = []
            try:
                print report_assetsInfo
            except:
                report_assetsInfo = {}
            try:
                print report_investorInformations
            except:
                report_investorInformations = []
            try:
                print report_equityChangeInformations
            except:
                report_equityChangeInformations = []
            try:
                print report_changeRecords
            except:
                 report_changeRecords = []
        dit1 = {"year":year,"baseInfo":report_basic,"website":report_website,"investorInformations":report_investorInformations,"assetsInfo":report_assetsInfo,"equityChangeInformations":report_equityChangeInformations,"changeRecords":report_changeRecords}
        dict2 = {"year":year,"html":html}
        yearReportList.append(dit1)
        yearList.append(dict2)
    alldata = {"abnormalOperation":abnormalOperation,"basicList":basicList,"shareHolderList":shareHolderList,"personList":personList,"punishBreakList":punishBreakList,"punishedList":punishedList,"alidebtList":alidebtList,"entinvItemList":entinvItemList,"frinvList":frinvList,"frPositionList":frPositionList,"alterList":alterList,"filiationList":filiationList,"caseInfoList":caseInfoList,"sharesFrostList":sharesFrostList,"sharesImpawnList":sharesImpawnList,"morDetailList":morDetailList,"morguaInfoList":morguaInfoList,"liquidationList":liquidationList,"yearReportList":yearReportList,"checkMessage":checkMessage}

    if args.get('type') == 1:
        province = "gd"
        keyword = args.get('searchword')
        companyName = basicList[0].get('enterpriseName')
        result = {"province":province,"type":0,"html":detail_html,"yearList":yearList,"keyword":keyword,"companyName":companyName}
        return (result,alldata)


    return alldata

def run2(url,**args):
    detail_html = curl(url)
    #print detail_html
    basictable = re.findall('<table[\s\S]+?</table>',detail_html)[0]
    basicList =  table.basic(basictable)
    pripid = re.search("entityVo.pripid=(.+?)'",detail_html).group(1)
    invent_url = 'http://121.8.226.101:7001/search/search!investorListShow?entityVo.curPage=%s&entityVo.pripid=%s'%(1,pripid)
    invent_data = requests.get(invent_url).json()
    page_size = int(invent_data['baseVo']['pageSize'])
    shareHolderList = []
    for i in range(1,page_size+1):
        invent_url = 'http://121.8.226.101:7001/search/search!investorListShow?entityVo.curPage=%s&entityVo.pripid=%s'%(i,pripid)
        invent_data = requests.get(invent_url).json()
        for invet in invent_data['investorList']:
            dict_temp = {}
            dict_temp['shareholderName'] = invet['sinvenstorname']
            dict_temp['subConam'] = invet['subconam']
            dict_temp['regCapCur'] = invet['invcurrency']
            dict_temp['conDate'] = ''
            dict_temp['fundedRatio'] = invet['conprop']
            dict_temp['country'] = invet['invcountry']
            dict_temp['shareholderType'] = invet['cardname']
            shareHolderList.append(dict_temp)

    alter_url = 'http://121.8.226.101:7001/search/search!changeListShow?entityVo.curPage=%s&entityVo.pripid=%s'%(1,pripid)
    alter_html = requests.get(alter_url).json()
    alter_page = int(alter_html['baseVo']['pageSize'])
    alterList = []
    for i in range(1,alter_page+1):
        alter_url = 'http://121.8.226.101:7001/search/search!changeListShow?entityVo.curPage=%s&entityVo.pripid=%s'%(i,pripid)
        alter_data = requests.get(alter_url).json()
        for alter in alter_data['changeList']:
            data_temp = {}
            data_temp['altDate'] = alter['altdate'].replace('T','')
            data_temp['altItem'] = alter['sname']
            data_temp['altBe'] = alter['altbe']
            data_temp['altAf'] = alter['altaf']
            alterList.append(data_temp)

    personList_url = 'http://121.8.226.101:7001/search/search!staffListShow?entityVo.curPage=%s&entityVo.pripid=%s'%(1,pripid)
    personList_html = requests.get(personList_url).json()
    personList_page = int(personList_html['baseVo']['pageSize'])
    personList = []
    for i in range(1,personList_page+1):
        person_url = 'http://121.8.226.101:7001/search/search!staffListShow?entityVo.curPage=%s&entityVo.pripid=%s'%(i,pripid)
        person_data = requests.get(person_url).json()
        for person in person_data['staffList']:
            data_temp = {}
            data_temp['position'] = person['sdutyname']
            data_temp['name'] = person['name']
            data_temp['sex'] = ''
            personList.append(data_temp)

    html_year = requests.get('http://121.8.226.101:7001/search/search!enterpriseShow?entityVo.pripid=%s'%pripid).content
    year_data = re.findall('''onclick="window\.open\('(.+?)'\)">(\d+)''',html_year)
    yearList = []
    yearReportList =[]
    for i in year_data:
        url = 'http://121.8.226.101:7001/search/' + i[0]
        year = i[1]
        html_temp = requests.get(url).content

        table_list = re.findall('<table[\s\S]+?</table>',html_temp)
        for j in table_list:
            if '企业基本信息' in j:
                report_basic = table.report_basic(j)
            if '网站或网店信息' in j:
                report_website = table.report_website(j)
            if '企业资产状况信息' in j:
                report_assetsInfo = table.report_assetsInfo(j)
            if '股东及出资信息' in j:
                report_investorInformations =  table.report_investorInformations(j)
            if '股权变更信息' in j:
                report_equityChangeInformations = table.report_equityChangeInformations(j)
            if '修改记录' in j:
                report_changeRecords = table.report_changeRecords(j)
            try:
                print report_basic
            except:
                report_basic = {}
            try:
                print report_website
            except:
                report_website = {}
            try:
                print report_assetsInfo
            except:
                report_assetsInfo = {}
            try:
                print report_investorInformations
            except:
                report_investorInformations = []
            try:
                print report_equityChangeInformations
            except:
                report_equityChangeInformations = []
            try:
                print report_changeRecords
            except:
                 report_changeRecords = []
        ditSource = {"year":year,"html":html_temp}
        yearList.append(ditSource)
        dit1 = {"year":year,"baseInfo":report_basic,"website":report_website,"investorInformations":report_investorInformations,"assetsInfo":report_assetsInfo,"equityChangeInformations":report_equityChangeInformations,"changeRecords":report_changeRecords}
        yearReportList.append(dit1)
    alldata = {"abnormalOperation":'',"basicList":basicList,"shareHolderList":shareHolderList,"personList":personList,"punishBreakList":'',"punishedList":'',"alidebtList":"","entinvItemList":"","frinvList":"","frPositionList":"","alterList":alterList,"filiationList":"","caseInfoList":"","sharesFrostList":"","sharesImpawnList":"","morDetailList":"","morguaInfoList":"","liquidationList":"","yearReportList":yearReportList}
    if args.get('type') == 1:
        html_source = {"province":"gd","type":0,"html":detail_html,"keyword":args.get('searchkey', "none"),"companyName":basicList[0]['enterpriseName'],"yearList":yearList}
        companyUrl = {"province":"gd","url":url}
        return (html_source,alldata)
    return alldata
def search(key):
    url = verify(key)
    #print html
    print "*"*100
    print url
    print "*"*100
    if url:
        if 'http://www.szcredit.com.cn/' in url:
            html = curl(url).decode('gbk').encode('utf8')
            rid = re.search('rid=(.+)',url).group(1)
            print rid
            return run(html,rid)
        if 'http://121.8.226.101:7001/' in url:
            #html = curl(url)
            return run2(url)
        else:
            url = url.replace('../','http://gsxt.gdgs.gov.cn/aiccips/')
            html = curl(url)
            return run1(html)
    else:
        return ()
def search2(key):
    url = verify(key)
    print "*"*100
    print url
    print "*"*100
    if url:
        if 'http://www.szcredit.com.cn/' in url:
            html = curl(url).decode('gbk').encode('utf8')
            rid = re.search('rid=(.+)',url).group(1)
            print rid
            return run(rid,html,searchword=key,type=1)
        if 'http://121.8.226.101:7001/' in url:
            #html = curl(url)
            return run2(url,searchword=key,type=1)
        else:
            url = url.replace('../','http://gsxt.gdgs.gov.cn/aiccips/')
            html = curl(url)
            return run1(html,searchword=key,type=1)
    else:
        return ()
def search3(url,data,*args,**kwargs):
    key = data.get("searchword")
    if 'http://www.szcredit.com.cn/' in url:
        #html = curl(url).decode('gbk').encode('utf8')
        rid = re.search('rid=(.+)',url).group(1)
        print rid
        return run(rid,kwargs.get('data'),searchword=key,type=1)
    if 'http://121.8.226.101:7001/' in url:
        #html = curl(url)
        print data
        return run2(url,searchword=key,type=1)
    else:
        url = url.replace('../','http://gsxt.gdgs.gov.cn/aiccips/')
        html = curl(url)
        return run1(html,searchword=key,type=1)
#print verify(u'内蒙古鄂尔多斯投资控股集团有限公司')
if __name__ == '__main__':
    #广东电网揭阳揭东供电局
    #print verify('广东新南方集团有限公司')  #广东新南方集团深圳投资有限公司
    #中海投资（深圳）有限公司
    print json.dumps(search2(u'广州丽滋摩塔贸易有限公司'),ensure_ascii=False,indent=4)
    #print json.dumps(search2(u'东莞市锐国五金制品有限公司'),ensure_ascii=False,indent=4)
################################################################
    # from pymongo import MongoClient
    # client = MongoClient('192.168.31.121')
    # db = client.crawler_company_name
    # co = db.companyName
    # for i in co.find({"province":"gd"}).skip(50):
    #     print i
    #     name  = i['companyName']
    #     print '*'*100
    #     print name
    #     print '*'*100
    #     print json.dumps(search2(name),ensure_ascii=False,indent=4)